#---------------------------------------------------------------------------------#
### information: 
#---------------------------------------------------------------------------------#
### authors: björn bremer (bremer@mpifg.de), reto bürgisser (buergisser@ipz.uzh.ch)
### last updated: 22 October 2021

#---------------------------------------------------------------------------------#
### description
#---------------------------------------------------------------------------------#

### this file replicates the analysis of the following paper:
### "Public Opinion on Welfare State Recalibration in Times of Austerity"
### this file focuses on the results from the SPLIT SAMPLE EXPERIMENT shown in the APPENDIX 

#---------------------------------------------------------------------------------#

set.seed(1234567)

#--------------------------------------------------------
### load libraries ###
#--------------------------------------------------------

library(grid)
library(gridExtra)
library(ggplot2)
library(ggpubr)
library(rstudioapi)
library(plyr)
library(dplyr)
library(stargazer)
library(Hmisc)

#--------------------------------------------------------
### set working directory and load data ###
#--------------------------------------------------------

setwd(dirname(getActiveDocumentContext()$path)) # set working directory to the location of this file
load("../data/df_split_psrm_replication.Rdata") # load the file

setwd("../figurestables/appendix") # set the wd for saving all figures

#--------------------------------------------------------
### table a.1: summary statistics ###
#--------------------------------------------------------

table(df_split$party_fam_rec)
df_split$party_fam_rec_num <- ifelse (df_split$party_fam_rec == "Far Left", 1,
                                      ifelse(df_split$party_fam_rec == "Center Left", 2,
                                             ifelse(df_split$party_fam_rec == "Center Right", 3,
                                                    ifelse(df_split$party_fam_rec == "Far Right", 4,
                                                           ifelse(df_split$party_fam_rec == "Others", 5,
                                                                  ifelse(df_split$party_fam_rec == "Abstention", 0, NA))))))

table(df_split$occup_gh)
df_split$occup_gh_num <- ifelse(df_split$occup_gh == "Employer", 1, 
                                ifelse(df_split$occup_gh == "Middle Class", 2,
                                       ifelse(df_split$occup_gh == "Working Class", 3,
                                              ifelse(df_split$occup_gh == "Routine Worker", 4, NA)))) 

table(df_split$workctr)
df_split$workctr_num <- ifelse(df_split$workctr == "Upscale", 1, 
                               ifelse(df_split$workctr == "Insider", 2,
                                      ifelse(df_split$workctr == "Outsider", 3,
                                             ifelse(df_split$workctr == "Out of Work", 4, NA)))) 


cols <- c("edu", "childcare", "almp", "split", "age", "female", "married", "children_u10",
          "education_c", "occup_gh_num", "income", "unemployed", "retired", "union", "party_fam_rec_num", "ideology", "workctr_num")
df_split[,cols] <- data.frame(apply(df_split[cols], 2, as.numeric))

stargazer(df_split[,c("edu", "childcare", "almp", "split", "age", "female", "married", "children_u10",
                      "education_c", "occup_gh_num", "income", "unemployed", "retired", "union", "party_fam_rec_num", "ideology", "workctr_num")],
          type="latex", out="table_a3.tex", font.size="footnotesize",
          title = "Summary Statistics", align=TRUE)



#--------------------------------------------------------
### figure a.1: share of respondents by income decile, pooled and by country   ### 
#--------------------------------------------------------

# pooled
bar_inc_rel <- ggplot(df_split, aes(x=income)) +
   geom_bar(aes(group=factor(0),y = ..prop..))  +  scale_x_continuous(breaks=c(1,2,3,4,5,6,7,8,9,10)) +
   ylab("Relative Frequencies") + xlab("Income decile") + theme_bw() +
   geom_hline(yintercept=0.10, linetype="solid", 
                color = "black", size=0.5)
bar_inc_rel
ggsave(bar_inc_rel, width = 18, height = 12, units = c("cm"), file ="figure_a1_increl.eps")


# by country
bar_inc_rel_cntry <- ggplot(df_split, aes(x=income)) +
   geom_bar(aes(group=factor(0),y = ..prop..))  +  scale_x_continuous(breaks=c(1,2,3,4,5,6,7,8,9,10)) +
   ylab("Relative Frequencies") + xlab("Income decile") + theme_bw() +
   geom_hline(yintercept=0.10, linetype="solid", color = "black", size=0.5) + 
   facet_wrap(~country, ncol = 2) 
bar_inc_rel_cntry
ggsave(bar_inc_rel_cntry, width = 18, height = 12, units = c("cm"), file ="figure_a1_increlcntry.eps")

   

#--------------------------------------------------------
### figure a.3: mean support for spending increases by treatment and country ###
#--------------------------------------------------------

## calculate average support and se by country

# education
df_split <- df_split[which (!is.na(df_split$split)),]

mean_edu_country <- ddply(df_split, c("split", "country"), summarise,
                          N = sum(!is.na(edu)),
                          mean = mean(edu,  na.rm=TRUE),
                          sd = sd(edu,  na.rm=TRUE),
                          se = sd / sqrt(N)
)

mean_edu_country$split <- as.factor(mean_edu_country$split)

# almp
mean_almp_country <- ddply(df_split, c("split", "country"), summarise,
                           N = sum(!is.na(almp)),
                           mean = mean(almp,  na.rm=TRUE),
                           sd = sd(almp,  na.rm=TRUE),
                           se = sd / sqrt(N)
)

mean_almp_country$split <- as.factor(mean_almp_country$split)

# childcare
mean_childcare_country <- ddply(df_split, c("split", "country"), summarise,
                                N = sum(!is.na(childcare)),
                                mean = mean(childcare,  na.rm=TRUE),
                                sd = sd(childcare,  na.rm=TRUE),
                                se = sd / sqrt(N)
)

mean_childcare_country$split <- as.factor(mean_childcare_country$split)

## plot the average with CIs by treatment and country

# education 
mean_edu_country$Country <- mean_edu_country$country

mean_edu_country$split <- factor(mean_edu_country$split, levels = c(1,3,2,4)) # re-order the graph
plot_mean_edu_country <- ggplot(mean_edu_country,aes(as.factor(split), mean, colour = Country)) +
      geom_errorbar(aes(ymin=mean-(1.41*se), ymax=mean+(1.41*se)), width=0, position = position_dodge(0.5)) + 
      geom_point(aes(y=mean, shape = Country), position = position_dodge(0.5), size=2) +
      scale_y_continuous(limits=c(3,9), breaks = round(seq(3, 9, by = 1), 1)) +
      ylab("Mean support for higher spending") +
      scale_x_discrete(name = "Treatment", breaks = c(1,2,3,4), c("Control", "Lower ALMP", "Lower pension", "Lower childcare")) +      
      scale_colour_grey(name = "Country", labels = c("DE", "IT", "UK"), start = 0.2, end = 0.8, aesthetics = "colour") +
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))  +
      ggtitle("Education Spending") + theme(plot.title = element_text(hjust = 0.5, face = "bold")) 
plot_mean_edu_country
ggsave(plot_mean_edu_country , width = 10, height = 10, units = c("cm"), file ="figure_a3_edu.eps")

# almp
mean_almp_country$Country <- mean_almp_country$country

mean_almp_country$split <- factor(mean_almp_country$split, levels = c(1,4,2,3)) # re-order the graph
plot_mean_almp_country <- ggplot(mean_almp_country,aes(split, mean, colour = Country)) +
   geom_errorbar(aes(ymin=mean-(1.41*se), ymax=mean+(1.41*se)), width=0, position = position_dodge(0.5)) + 
   geom_point(aes(y=mean, shape = Country), position = position_dodge(0.5), size=2) +
   scale_y_continuous(limits=c(3,9), breaks = round(seq(3, 9, by = 1), 1)) +
   ylab("Mean support for higher spending") +
   scale_x_discrete(name = "Treatment", breaks = c(1,2,3,4), c("Control", "Lower pension", "Lower childcare", "Lower PLMP")) +      
   scale_colour_grey(name = "Country", labels = c("DE", "IT", "UK"), start = 0.2, end = 0.8, aesthetics = "colour") +
   theme_bw() +
   theme(axis.text.x = element_text(angle = 45, hjust = 1))  +
   ggtitle("ALMP") + theme(plot.title = element_text(hjust = 0.5, face = "bold")) 
plot_mean_almp_country
ggsave(plot_mean_almp_country , width = 10, height = 10, units = c("cm"), file ="figure_a3_almp.eps")

# childcare
mean_childcare_country$Country <- mean_childcare_country$country

mean_childcare_country$split <- factor(mean_childcare_country$split, levels = c(1,2,3,4)) # re-order the graph
plot_mean_childcare_country <- ggplot(mean_childcare_country,aes(split, mean, colour = Country)) +
      geom_errorbar(aes(ymin=mean-(1.41*se), ymax=mean+(1.41*se)), width=0, position = position_dodge(0.5)) + 
      geom_point(aes(y=mean, shape = Country), position = position_dodge(0.5), size=2) +
      scale_y_continuous(limits=c(3,9), breaks = round(seq(3, 9, by = 1), 1)) +
      ylab("Mean support for higher spending") +
      scale_x_discrete(name = "Treatment", breaks = c(1,2,3,4), c("Control", "Lower child bf.", "Lower pension", "Lower PLMP")) +      
      theme_bw()  + 
      scale_colour_grey(name = "Country", labels = c("DE", "IT", "UK"), start = 0.2, end = 0.8, aesthetics = "colour") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))  +
      ggtitle("Childcare") + theme(plot.title = element_text(hjust = 0.5, face = "bold"))
plot_mean_childcare_country
ggsave(plot_mean_childcare_country , width = 10, height = 10, units = c("cm"), file ="figure_a3_childcare.eps")

#--------------------------------------------------------
### figure a.4: share of supporters for spending increases by treatment and country ###
#--------------------------------------------------------

## education

# create new education binary support variable
df_split_bin <- df_split[which (!is.na(df_split$split)),]

df_split_bin$edu_bin <- ifelse(df_split$edu <= 5, 0,
                               ifelse(df_split$edu > 5, 1, NA))

df_split_bin$edu_bin <- df_split_bin$edu_bin*100 

# calculate means and se by country 
mean_edu_country_bin <- ddply(df_split_bin, c("split", "country"), summarise,
                              N = sum(!is.na(edu_bin)),
                              mean = mean(edu_bin,  na.rm=TRUE),
                              sd = sd(edu_bin,  na.rm=TRUE),
                              se = sd / sqrt(N)
)

mean_edu_country_bin$split <- as.factor(mean_edu_country_bin$split)

# bar plot with CIs by country
barplot_mean_edu_country_bin <- ggplot(mean_edu_country_bin, aes(split, mean, fill = country)) +
      geom_hline(yintercept = 50, colour = "gray50") + 
      geom_bar(aes(fill = country), stat="identity", position=position_dodge(), colour="black", size=.3) +
      geom_errorbar(aes(ymin=mean-(1.41*se), ymax=mean+(1.41*se)), width=.4, position=position_dodge(.9)) +
      scale_y_continuous(limits=c(0,100), breaks = round(seq(0, 100, by = 10), 1)) +
      ylab("Support for education spending (in %)") +
      scale_x_discrete(name = "Treatment", limits = c("1", "3", "2", "4"), labels = c("Control", "Lower pension", "Lower ALMP","Lower childcare")) +  
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
      scale_fill_manual(values=c("gray20","gray50","gray80")) +
      labs(fill = "Country") + 
      theme(legend.position="bottom")
barplot_mean_edu_country_bin

ggsave(barplot_mean_edu_country_bin, width = 20, height = 12, units = c("cm"), file ="figure_a4_edu.eps")

## almp

# create new almp binary support variable 
df_split_bin <- df_split[which (!is.na(df_split$split)),]

df_split_bin$almp_bin <- ifelse(df_split$almp <= 5, 0,
                                ifelse(df_split$almp > 5, 1, NA))

df_split_bin$almp_bin <- df_split_bin$almp_bin*100 

# calculate means and se by country 
mean_almp_country_bin <- ddply(df_split_bin, c("split", "country"), summarise,
                               N = sum(!is.na(almp_bin)),
                               mean = mean(almp_bin,  na.rm=TRUE),
                               sd = sd(almp_bin,  na.rm=TRUE),
                               se = sd / sqrt(N)
)

mean_almp_country_bin$split <- as.factor(mean_almp_country_bin$split)

# bar plot with CIs by country
barplot_mean_almp_country_bin <- ggplot(mean_almp_country_bin, aes(split, mean, fill = country)) +
      geom_hline(yintercept = 50, colour = "gray50") + 
      geom_bar(aes(fill = country), stat="identity", position=position_dodge(), colour="black", size=.3) +
      geom_errorbar(aes(ymin=mean-(1.41*se), ymax=mean+(1.41*se)), width=.4, position=position_dodge(.9)) +
      scale_y_continuous(limits=c(0,100), breaks = round(seq(0, 100, by = 10), 1)) +
      ylab("Support for ALMP spending (in %)") +
      scale_x_discrete(name = "Treatment", limits = c("1", "4", "2", "3"), labels = c("Control", "Lower PLMP", "Lower pension", "Lower childcare")) +  
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
      scale_fill_manual(values=c("gray20","gray50","gray80")) +
      labs(fill = "Country") + 
      theme(legend.position="bottom")
barplot_mean_almp_country_bin

ggsave(barplot_mean_almp_country_bin, width = 20, height = 12, units = c("cm"), file ="figure_a4_almp.eps")

## childcare

# create new education binary support variable 
df_split_bin <- df_split[which (!is.na(df_split$split)),]

df_split_bin$childcare_bin <- ifelse(df_split$childcare <= 5, 0,
                                     ifelse(df_split$childcare > 5, 1, NA))

df_split_bin$childcare_bin <- df_split_bin$childcare_bin*100 

# calculate means and se by country 
mean_childcare_country_bin <- ddply(df_split_bin, c("split", "country"), summarise,
                                    N = sum(!is.na(childcare_bin)),
                                    mean = mean(childcare_bin,  na.rm=TRUE),
                                    sd = sd(childcare_bin,  na.rm=TRUE),
                                    se = sd / sqrt(N)
)

mean_childcare_country_bin$split <- as.factor(mean_childcare_country_bin$split)

# bar plot with CIs by country
barplot_mean_childcare_country_bin <- ggplot(mean_childcare_country_bin, aes(split, mean, fill = country)) +
      geom_hline(yintercept = 50, colour = "gray50") + 
      geom_bar(aes(fill = country), stat="identity", position=position_dodge(), colour="black", size=.3) +
      geom_errorbar(aes(ymin=mean-(1.41*se), ymax=mean+(1.41*se)), width=.4, position=position_dodge(.9)) +
      scale_y_continuous(limits=c(0,100), breaks = round(seq(0, 100, by = 10), 1)) +
      ylab("Support for childcare spending (in %)") +
      scale_x_discrete(name = "Treatment", labels = c("Control", "Lower child bf.", "Lower pension", "Lower PLMP")) +  
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
      scale_fill_manual(values=c("gray20","gray50","gray80")) +
      labs(fill = "Country") + 
      theme(legend.position="bottom")
barplot_mean_childcare_country_bin

ggsave(barplot_mean_childcare_country_bin, width = 20, height = 12, units = c("cm"), file ="figure_a4_childcare.eps")

#--------------------------------------------------------
### figure a.7: share of supporters for spending increases by treatment with survey weights, pooled ###
#--------------------------------------------------------

## calculate mean and se with weights
mean_edu <- ddply(df_split, c("split"), summarise,
                  N = sum(!is.na(edu)),
                  mean = wtd.mean(edu,  wgt, na.rm=TRUE),
                  sd = sqrt(wtd.var(edu, wgt, na.rm=TRUE)),
                  se = sd / sqrt(N)
)

mean_edu$split <- as.factor(mean_edu$split)

mean_almp <- ddply(df_split, c("split"), summarise,
                   N = sum(!is.na(almp)),
                   mean = wtd.mean(almp,  wgt, na.rm=TRUE),
                   sd = sqrt(wtd.var(almp, wgt, na.rm=TRUE)),
                   se = sd / sqrt(N)
)

mean_almp$split <- as.factor(mean_almp$split)

mean_childcare <- ddply(df_split, c("split"), summarise,
                        N = sum(!is.na(childcare)),
                        mean = wtd.mean(childcare,  wgt, na.rm=TRUE),
                        sd = sqrt(wtd.var(childcare, wgt, na.rm=TRUE)),
                        se = sd / sqrt(N)
)

mean_childcare$split <- as.factor(mean_childcare$split)

## plot the graphs
plot_mean_edu_weights <- ggplot(mean_edu,aes(split, mean)) +
   geom_errorbar(aes(ymin=mean-(1.96*se), ymax=mean+(1.96*se)), width=0, color="grey50") + 
   geom_point(aes(y=mean),size=1.5) +
   scale_y_continuous(limits=c(3, 9), breaks = round(seq(3, 9, by = 1), 1)) +
   ylab("Mean support for spending (0-10 Scale)") +
   scale_x_discrete(name = "Treatment", labels = c("Control", "Lower ALMP", "Lower PLMP", "Lower childcare")) +      
   theme_bw()  +
   theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
   ggtitle("Education Spending") +theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
   coord_fixed(ratio = 1.1)
plot_mean_edu_weights
ggsave(plot_mean_edu_weights, width = 8, height = 12, units = c("cm"), file ="figure_a7_edu.eps")

plot_mean_almp_weights <- ggplot(mean_almp,aes(split, mean)) +
   geom_errorbar(aes(ymin=mean-(1.96*se), ymax=mean+(1.96*se)), width=0, color="grey50") + 
   geom_point(aes(y=mean),size=1.5) +
   scale_y_continuous(limits=c(3, 9), breaks = round(seq(3, 9, by = 1), 1)) +
   ylab("Mean support for spending (0-10 Scale)") +
   scale_x_discrete(name = "Treatment", labels = c("Control", "Lower pension", "Lower childcare", "Lower PLMP")) +      
   theme_bw()  +
   theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
   ggtitle("Active Labor Market Policy") +theme(plot.title = element_text(hjust = 0.5, face = "bold")) + 
   coord_fixed(ratio = 1.1)
plot_mean_almp_weights
ggsave(plot_mean_almp_weights, width = 8, height = 12, units = c("cm"), file ="figure_a7_almp.eps")

plot_mean_childcare_weights <- ggplot(mean_childcare,aes(split, mean)) +
   geom_errorbar(aes(ymin=mean-(1.96*se), ymax=mean+(1.96*se)), width=0, color="grey50") + 
   geom_point(aes(y=mean),size=1.5) +
   scale_y_continuous(limits=c(3, 9), breaks = round(seq(3, 9, by = 1), 1)) +
   ylab("Mean support for spending (0-10 Scale)") +
   scale_x_discrete(name = "Treatment", labels = c("Control", "Lower child bf.", "Lower pension", "Lower PLMP")) +
   theme_bw() +
   theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
   ggtitle("Childcare") +theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
   coord_fixed(ratio = 1.1)
plot_mean_childcare_weights
ggsave(plot_mean_childcare_weights, width = 8, height = 12, units = c("cm"), file ="figure_a7_childcare.eps")

